home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power Programmierung
/
Power-Programmierung (Tewi)(1994).iso
/
magazine
/
pctchnqs
/
1991
/
number4
/
tokenize.pas
< prev
next >
Wrap
Pascal/Delphi Source File
|
1991-06-03
|
5KB
|
159 lines
UNIT Tokenizer; { Turbo Pascal 4.0-up; QuickPascal 1.0 }
INTERFACE
USES Dos, FNameHax, Back1Hax;
TYPE Tkind =
(newline,whitespace,symbol,number,
qstring,bracom,eolcom,special);
CONST KindName:
ARRAY[Tkind] OF string[10] =
('newline','whitespace','symbol','number',
'qstring','bracom','eolcom','special');
VAR KindOfToken: Tkind;
PROCEDURE ReadToken(VAR f:text; VAR t:string);
{ - Reads a token from f into t, consuming
exactly the characters that make up the token.
- Sets global variable KindOfToken to the kind of token read.
- KindName[KindOfToken] is then a printable description
of the kind of token. }
IMPLEMENTATION
CONST WhiteSpaces: SET OF char = [#0..#9,#11..#12,#14..' '];
Alphabetics: SET OF char = ['a'..'z','A'..'Z','_'];
Digits: SET OF char = ['0'..'9'];
PROCEDURE ReadToken(VAR f:text; VAR t:string);
LABEL OneTooMany, AllDone;
VAR c, quote, prev: Char;
BEGIN
read(f,c); t:=c; { Get first char of token }
IF (c=^M) THEN { NEWLINE - Type 1 }
BEGIN { ^M^J or just ^M }
KindOfToken := newline;
read(f,c); t:=t+c; { Look at the next character }
IF c=^J THEN { If it's ^J, it belongs to this token }
GOTO AllDone { so keep it }
ELSE
GOTO OneTooMany { Otherwise we've read one too many }
END
ELSE IF (c=^J) THEN { NEWLINE - Type 2 }
BEGIN { ^J by itself }
KindOfToken := newline;
GOTO AllDone
END
ELSE IF c IN WhiteSpaces THEN { WHITESPACE }
BEGIN
KindOfToken := whitespace;
REPEAT
read(f,c); t:=t+c
UNTIL NOT (c IN WhiteSpaces);
GOTO OneTooMany
END
ELSE IF c IN Alphabetics THEN { SYMBOL }
BEGIN
KindOfToken := symbol;
REPEAT
read(f,c); t:=t+c
UNTIL (NOT (c IN Alphabetics)) AND (NOT (c in Digits));
GOTO OneTooMany
END
ELSE IF (c='''') OR (c='"') THEN { QUOTED STRING }
BEGIN
KindOfToken := qstring;
quote := c;
REPEAT
read(f,c); t:=t+c;
IF (c=quote) THEN
IF NextChar(f) <> quote THEN { found end of string }
GOTO AllDone
ELSE { it's a quote written double; grab it }
BEGIN
read(f,c); t:=t+c
END
UNTIL eoln(f);
FileError(f,'Expecting '+quote+', found end of line')
END
ELSE IF (c IN Digits) OR
((c='.') AND (NextChar(f) IN Digits)) THEN { NUMBER }
BEGIN
KindOfToken := number;
IF c<>'.' THEN
REPEAT
read(f,c); t:=t+c { Some digits... }
UNTIL NOT (c IN Digits);
IF c='.' THEN { Decimal point... }
REPEAT
read(f,c); t:=t+c { More digits... }
UNTIL NOT (c IN Digits);
IF (c='E') or (c='e') THEN { E for exponent... }
BEGIN
read(f,c); t:=t+c;
IF (c IN Digits) OR
(c='+') OR (c='-') THEN { +, -, or digit }
REPEAT
read(f,c); t:=t+c { Digits of exponent }
UNTIL NOT (c IN Digits)
END;
GOTO OneTooMany
END
ELSE IF c='{' THEN { BRACKETED COMMENT - Type 1 }
BEGIN
KindOfToken := bracom;
t:=' '; { Return a blank to the calling program }
REPEAT
read(f,c);
IF c='}' THEN GOTO AllDone
UNTIL eof(f);
FileError(f,'Expecting ''}'', found end of file')
END
ELSE IF (c='(') AND
(NextChar(f)='*') THEN { BRACKETED COMMENT - Type 2 }
BEGIN
KindOfToken := bracom;
t:=' '; { Return a blank to the calling program }
REPEAT
prev:=c; read(f,c);
IF (c=')') AND (prev='*') THEN GOTO AllDone
UNTIL eof(f);
FileError(f,'Expecting ''*)'', found end of file')
END
ELSE IF c='%' THEN { COMMENT TO END OF LINE }
BEGIN
KindOfToken := eolcom;
REPEAT
read(f,c); t:=t+c
UNTIL (c=^M) OR (c=^J);
GOTO OneTooMany
END
ELSE { THE CHARACTER BY ITSELF IS A TOKEN }
BEGIN
KindOfToken := special;
GOTO AllDone
END;
OneTooMany: { Jump here if an extra character has been read }
back1char(f);
delete(t,length(t),1);
AllDone: { Jump here if nothing else needs to be done }
END;
END. { Tokenizer }